
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	
NOTE: the do-files 00_master_wage_imp, 01_split_episodes, 04_merge_basic_BHP, 
05_educ_broad, 06_wages_assessment_ceiling, 07_wages_marginal, 
08_wages_deflation, and 10_wages_imputation build on the do-files offered by 
Heiko Stüber, Wolfgang Dauth, Johann Eppelsheimer (see blow) but were slightly 
adjusted to run the code on our sample of the BeH.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	
	Author(s): Heiko Stüber, Wolfgang Dauth, Johann Eppelsheimer
	
	Version: 1.0
	Created: 2022-11-24
	
	Note: 
	This do-file is part of the Stata do-file collection offered as a 
	supplement to Stüber, Dauth, and Eppelsheimer (2023): A Guide to Preparing 
	the Sample of Integrated Labour Market Biographies (SIAB, version 7519 v1) 
	for Scientific Analysis. Journal for Labour Market Research, 57:7.
	https://doi.org/10.1186/s12651-023-00335-w

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/



/*
capture log close
global logfile = 1 // set global to 0 if you do not want to open the following log-file
if(${logfile}==1) log using "$log/00_master_wage_imp.log", replace
dis "$S_DATE $S_TIME"
*/

********************************************************************************
* Load data and set observation period
********************************************************************************
* set observation period
global minYear = 1997 // begin of your observation period
global maxYear = 2019 // end of your observation period

* Enables/disables certain outputs (tables and figures) to inspect the data
global inspect = 0 // default: 0, set 1 if you want to produce outputs

* Prepare BHP data
use "${data}/bhp_7519_m06_v2_1997.dta", clear
keep(betnr ao_bula) 
gen jahr = 1997
forv y = 1998 / 2019 {
append using "${data}/bhp_7519_m06_v2_`y'.dta", keep(betnr ao_bula) 
replace jahr = `y' if missing(jahr)
}
compress
save "$temp\ao_bula.dta", replace



* Load BeH data
use "$temp\BeH_large.dta", clear

rename year jahr
rename beh_ausbildung ausbildung_imp
rename pt teilzeit

cap log close

********************************************************************************
* 04) Merge basic BHP data
********************************************************************************
global logfile = 1 // set global to 0 if you do not want log the following do-file
do ${imp}\04_merge_basic_BHP.do


********************************************************************************
* 05) Create broader education groups
********************************************************************************
global logfile = 1 // set global to 0 if you do not want log the following do-file
do ${imp}\05_educ_broad.do


********************************************************************************
* 06) Add the contribution assessment ceiling
********************************************************************************
global logfile = 1 // set global to 0 if you do not want log the following do-file
do ${imp}\06_wages_assessment_ceiling.do


********************************************************************************
* 07) Add the marginal part-time income threshold and flag marginal wages
********************************************************************************
global logfile = 1 // set global to 0 if you do not want log the following do-file
do ${imp}\07_wages_marginal.do


********************************************************************************
* 08) Deflate wages, marginal part-time income threshold and contribution assessment ceiling
********************************************************************************
global logfile = 1 // set global to 0 if you do not want log the following do-file
do ${imp}\08_wages_deflation.do
save "$temp\imp_wage_orig.dta", replace

********************************************************************************
* 10) Impute wages (2-step procedure, based on Dustmann et al. (2009) and Card et al. (2013))
********************************************************************************
global logfile = 1 // set global to 0 if you do not want log the following do-file
do ${imp}\10_wages_imputation.do
clear
erase "$temp\imp_wage_orig.dta"


*** Generate BeH.dta

use "$temp\BeH_large", clear
drop tentgelt

merge 1:1 persnr spell using "$temp\imp_wage.dta", keepusing(tentgelt ao_bula) // imputed wages
tab _merge, missing
drop _merge

compress
save "$temp\BeH_large", replace

d

su year
tab beh_pers_gr, mis
tab sex_id, mis
su tentgelt
tab pt, mis
su spell_length
tab year, mis
su age

drop sex_id beh_pers_gr age beh_ausbildung beh_beruf_num beh_staat_dummy

compress
save "$temp\BeH.dta", replace // 1997-2019
clear

cap log close
